subway_cleaned = read_csv("Data/cleaned_subway_data.csv")
# Importing NYC map
nyc_map = st_read(here::here('NYC', 'nyc.shp'), quiet = TRUE)
nycmap = st_transform(nyc_map, crs = 4326)
Here we investigate features and the overall accessibility of the stations in order to gain insights how accessible NYC subway stations are.
subway_cleaned %>%
ggplot() +
geom_sf(
data = nyc_map, fill = NA
) +
geom_point(
aes(x = station_longitude, y = station_latitude, color = ada),
size = 2.5, alpha = 0.5) +
coord_sf() +
theme_void(base_size = 10) +
theme(legend.position = 'bottom') +
guides(color = guide_legend(
title.position = "top",
override.aes = list(size = 3))) +
scale_color_manual(values = c("FALSE" = "aquamarine3", "TRUE" = "slateblue3")) +
labs(color = "ADA Compliance")
ada compliance ensures that individuals with
disabilities can safely and easily access subway stations. In the plot,
each dot represent a station. We can tell that NYC’s stations do not
perform well on this issue…
subway_cleaned %>%
ggplot() +
geom_sf(
data = nyc_map, fill = NA
) +
geom_point(
aes(x = station_longitude, y = station_latitude, color = free_crossover),
size = 2.5, alpha = 0.5) +
coord_sf() +
theme_void(base_size = 10) +
theme(legend.position = 'bottom') +
guides(color = guide_legend(
title.position = "top",
override.aes = list(size = 3))) +
labs(color = "Free Crossover")
As the most complex subway system in the world, however, NYC Subway
provides free_crossover for most of its stations.With this
understanding of both the shortcomings and strengths in terms of ADA
compliance and free crossovers, we now turn our attention to evaluating
the overall accessibility performance of NYC’s subway stations as a
whole.
# Select the Variables for Clustering
clustering_data <- subway_cleaned %>%
dplyr::select(entrance_type, staffing, ada, free_crossover, station_latitude, station_longitude, station_name)
set.seed(123) # For reproducibility
km_result <- clustering_data %>%
dplyr::select(-station_latitude, -station_longitude, -station_name) %>%
kmodes(modes = 3)
We cluster the overall accessibility in three groups(levels) with the features of accessibility inside the stations.
clustering_data$.cluster <- factor(km_result$cluster)
scenario_counts <- clustering_data %>%
mutate(
scenario = case_when(
ada == "TRUE" & free_crossover == "TRUE" ~ "ADA=TRUE, Free=TRUE",
ada == "FALSE" & free_crossover == "FALSE" ~ "ADA=FALSE, Free=FALSE",
ada == "FALSE" & free_crossover == "TRUE" ~ "ADA=FALSE, Free=TRUE",
TRUE ~ NA_character_
)
) %>%
filter(!is.na(scenario)) %>%
group_by(scenario, .cluster) %>%
summarise(n = n(), .groups = "drop")
plot_ly(
data = scenario_counts,
x = ~scenario,
y = ~n,
color = ~.cluster,
type = "bar"
) %>%
layout(
title = "Count of Stations by Scenario and Cluster",
xaxis = list(title = "Scenario"),
yaxis = list(title = "Count of Stations"),
barmode = "group",
bargap = 0.2
)
From the plot, we can tell the stations with with highest
accessibility is less likely to have neither ada nor
free_crossover, and the lowest accessibility stations vice
versa. Also, in the scenario where the stations only have
free_crossover, the more accessible this station is, the
less likely it will happen. However, most of the stations only have the
basic amenities free_crossover.
# Define a Mode function
Mode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
cluster_profiles <- clustering_data %>%
group_by(.cluster) %>%
dplyr::select(-station_latitude, -station_longitude) %>%
dplyr::select(station_name, everything()) %>%
summarise(across(everything(), ~ Mode(.x)))
knitr::kable(cluster_profiles)
| .cluster | station_name | entrance_type | staffing | ada | free_crossover |
|---|---|---|---|---|---|
| 1 | 59th St | Stair | FULL | FALSE | TRUE |
| 2 | 25th St | Stair | FULL | FALSE | TRUE |
| 3 | 36th St | Stair | FULL | FALSE | TRUE |
As we display the result of our model, the all of the dominant features tend to be same across the clusters. This result happens due to the un-even distribution of these categorical variables, which can be reflected in our visualization section.
clustering_data <- clustering_data %>%
mutate(
accessibility_level = case_when(
.cluster == 1 ~ "High Accessibility",
.cluster == 2 ~ "Medium Accessibility",
.cluster == 3 ~ "Low Accessibility"
),
accessibility_level = factor(accessibility_level,
levels = c("High Accessibility", "Medium Accessibility", "Low Accessibility"))
)
pal <- leaflet::colorFactor(
palette = c("chartreuse", "darkgoldenrod1", "brown2"), # Adjust colors as needed
domain = clustering_data$accessibility_level
)
leaflet() |>
addTiles() |>
addCircleMarkers(data = clustering_data,
lng = ~station_longitude,
lat = ~station_latitude,
label = ~station_name,
radius = 4,
color = NA,
# color = ~pal(accessibility_level),
fillColor = ~pal(accessibility_level),
stroke = TRUE, fillOpacity = 0.75,
popup = ~paste("Ada:", ada,
"<br> Staffing:", staffing,
"<br> Entrance type:", entrance_type,
"<br> Free crossover:", free_crossover)) |>
addProviderTiles(providers$CartoDB.Positron) |>
addLegend(
"bottomright",
pal = pal,
values = clustering_data$accessibility_level,
title = "Accessibility Level",
opacity = 1
)
Now, let’s take a look at where high and low accesibility station reside in our city. It appears that the borough/area with high concentration of low accessibility stations are Downtown Manhhatan, Bronx, and Downtown Brooklyn.
subway_with_restroom = read_csv("Data/cleaned_subway_restroom_data.csv")
subway_with_restroom <- subway_with_restroom %>%
mutate(
# #convert to logical
restroom_changing_stations_logic = as.logical(restroom_changing_stations),
restroom_status_logic = as.logical(restroom_status),
restroom_accessibility = fct_explicit_na(restroom_accessibility, na_level = "Unknown"),
restroom_open = fct_explicit_na(restroom_open, na_level = "Unknown")
)
# Select the Variables for Clustering
clustering_merged_data <- subway_with_restroom %>%
dplyr::select(entrance_type, staffing, ada, free_crossover, station_latitude, station_longitude, station_name,
restroom_open, restroom_accessibility,restroom_changing_stations_logic, restroom_status_logic)
set.seed(123) # For reproducibility
km_result2 <- clustering_merged_data %>%
dplyr::select(-station_latitude, -station_longitude, -station_name) %>%
klaR::kmodes(modes = 3)
clustering_merged_data$.cluster <- factor(km_result2$cluster)
# Define a Mode function
Mode_for_merged <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
cluster_merged_profiles <- clustering_merged_data %>%
group_by(.cluster) %>%
dplyr::select(-station_latitude, -station_longitude) %>%
dplyr::select(station_name, everything()) %>%
summarise(across(everything(), ~ Mode_for_merged(.x)))
knitr::kable(cluster_merged_profiles)
| .cluster | station_name | entrance_type | staffing | ada | free_crossover | restroom_open | restroom_accessibility | restroom_changing_stations_logic | restroom_status_logic |
|---|---|---|---|---|---|---|---|---|---|
| 1 | 36th St | Stair | FULL | FALSE | TRUE | Year Round | Fully Accessible | FALSE | TRUE |
| 2 | 45th St | Stair | FULL | FALSE | TRUE | Year Round | Unknown | FALSE | TRUE |
| 3 | 25th St | Stair | FULL | FALSE | FALSE | Year Round | Not Accessible | FALSE | TRUE |
clustering_merged_data <- clustering_merged_data %>%
mutate(
accessibility_level = case_when(
.cluster == 1 ~ "High Accessibility",
.cluster == 2 ~ "Medium Accessibility",
.cluster == 3 ~ "Low Accessibility"
)
)
pal <- leaflet::colorFactor(
palette = c("chartreuse", "darkgoldenrod1", "brown2"), # Adjust colors as needed
domain = clustering_merged_data$accessibility_level
)
leaflet() |>
addTiles() |>
addCircleMarkers(data = clustering_merged_data,
lng = ~station_longitude,
lat = ~station_latitude,
label = ~station_name,
radius = 4,
color = NA,
# color = ~pal(accessibility_level),
fillColor = ~pal(accessibility_level),
stroke = TRUE, fillOpacity = 0.75,
popup = ~paste("Ada:", ada,
"<br> Staffing:", staffing,
"<br> Entrance type:", entrance_type,
"<br> Free crossover:", free_crossover)) |>
addProviderTiles(providers$CartoDB.Positron) |>
addLegend(
"bottomright",
pal = pal,
values = clustering_merged_data$accessibility_level,
title = "Accessibility Level",
opacity = 1
)